{- git-annex import from remotes
-
- - Copyright 2019-2021 Joey Hess <id@joeyh.name>
+ - Copyright 2019-2023 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
import Messages.Progress
import Utility.DataUnits
import Utility.Metered
+import Utility.Hash (sha1s)
import Logs.Export
import Logs.Location
import Logs.PreferredContent
import qualified Data.Set as S
import qualified System.FilePath.Posix.ByteString as Posix
import qualified System.FilePath.ByteString as P
+import qualified Data.ByteArray.Encoding as BA
{- Configures how to build an import tree. -}
data ImportTreeConfig
-> Maybe TopFilePath
-> ImportableContentsChunkable Annex (Either Sha Key)
-> Annex (History Sha)
-buildImportTrees basetree msubdir (ImportableContentsComplete importable) = do
+buildImportTrees = buildImportTreesGeneric convertImportTree
+
+convertImportTree :: Maybe TopFilePath -> [(ImportLocation, Either Sha Key)] -> Annex Tree
+convertImportTree msubdir ls = treeItemsToTree <$> mapM mktreeitem ls
+ where
+ mktreeitem (loc, v) = case v of
+ Right k -> do
+ relf <- fromRepo $ fromTopFilePath topf
+ symlink <- calcRepo $ gitAnnexLink relf k
+ linksha <- hashSymlink symlink
+ return $ TreeItem treepath (fromTreeItemType TreeSymlink) linksha
+ Left sha ->
+ return $ TreeItem treepath (fromTreeItemType TreeFile) sha
+ where
+ lf = fromImportLocation loc
+ treepath = asTopFilePath lf
+ topf = asTopFilePath $
+ maybe lf (\sd -> getTopFilePath sd P.</> lf) msubdir
+
+{- Builds a history of git trees using ContentIdentifiers.
+ -
+ - These are not the final trees that are generated by the import, which
+ - use Keys. The purpose of these trees is to allow quickly determining
+ - which files in the import have changed, and which are unchanged, to
+ - avoid needing to look up the Keys for unchanged ContentIdentifiers.
+ - When the import has a large number of files, that can be slow.
+ -}
+buildContentIdentifierTree
+ :: ImportableContentsChunkable Annex (ContentIdentifier, ByteSize)
+ -> Annex (History Sha)
+buildContentIdentifierTree =
+ buildImportTreesGeneric convertContentIdentifierTree emptyTree Nothing
+
+{- For speed, and to avoid bloating the repository, the ContentIdentifiers
+ - are not actually checked into git, instead a sha1 hash is calculated
+ - internally.
+ -}
+convertContentIdentifierTree
+ :: Maybe TopFilePath
+ -> [(ImportLocation, (ContentIdentifier, ByteSize))]
+ -> Annex Tree
+convertContentIdentifierTree _ ls = pure $ treeItemsToTree $ map mktreeitem ls
+ where
+ mktreeitem (loc, ((ContentIdentifier cid), _sz)) =
+ TreeItem p mode sha1
+ where
+ p = asTopFilePath (fromImportLocation loc)
+ mode = fromTreeItemType TreeFile
+ -- Note that this hardcodes sha1, even if git has started
+ -- defaulting to some other checksum method. That should be
+ -- ok, hopefully. This checksum never needs to be verified
+ -- by git, which is why this does not bother to prefix the
+ -- cid with its length, like git would.
+ sha1 = Ref $ BA.convertToBase BA.Base16 $ sha1s cid
+
+buildImportTreesGeneric
+ :: (Maybe TopFilePath -> [(ImportLocation, v)] -> Annex Tree)
+ -> Ref
+ -> Maybe TopFilePath
+ -> ImportableContentsChunkable Annex v
+ -> Annex (History Sha)
+buildImportTreesGeneric converttree basetree msubdir (ImportableContentsComplete importable) = do
repo <- Annex.gitRepo
- withMkTreeHandle repo $ buildImportTrees' basetree msubdir importable
-buildImportTrees basetree msubdir importable@(ImportableContentsChunked {}) = do
+ withMkTreeHandle repo $ buildImportTreesGeneric' converttree basetree msubdir importable
+buildImportTreesGeneric converttree basetree msubdir importable@(ImportableContentsChunked {}) = do
repo <- Annex.gitRepo
withMkTreeHandle repo $ \hdl ->
History
<$> go hdl
- <*> buildImportTreesHistory basetree msubdir
+ <*> buildImportTreesHistory converttree basetree msubdir
(importableHistoryComplete importable) hdl
where
go hdl = do
let fullprefix = asTopFilePath $ case msubdir of
Nothing -> subdir
Just d -> getTopFilePath d Posix.</> subdir
- Tree ts <- convertImportTree (Just fullprefix) $
+ Tree ts <- converttree (Just fullprefix) $
map (\(p, i) -> (mkImportLocation p, i))
(importableContentsSubTree c)
-- Record this subtree before getting next chunk, this
Nothing -> return (Tree (tc:l))
Just c' -> gochunks (tc:l) c' hdl
-buildImportTrees'
- :: Ref
+buildImportTreesGeneric'
+ :: (Maybe TopFilePath -> [(ImportLocation, v)] -> Annex Tree)
+ -> Ref
-> Maybe TopFilePath
- -> ImportableContents (Either Sha Key)
+ -> ImportableContents v
-> MkTreeHandle
-> Annex (History Sha)
-buildImportTrees' basetree msubdir importable hdl = History
- <$> buildImportTree basetree msubdir (importableContents importable) hdl
- <*> buildImportTreesHistory basetree msubdir (importableHistory importable) hdl
+buildImportTreesGeneric' converttree basetree msubdir importable hdl = History
+ <$> buildImportTree converttree basetree msubdir (importableContents importable) hdl
+ <*> buildImportTreesHistory converttree basetree msubdir (importableHistory importable) hdl
buildImportTree
- :: Ref
+ :: (Maybe TopFilePath -> [(ImportLocation, v)] -> Annex Tree)
+ -> Ref
-> Maybe TopFilePath
- -> [(ImportLocation, Either Sha Key)]
+ -> [(ImportLocation, v)]
-> MkTreeHandle
-> Annex Sha
-buildImportTree basetree msubdir ls hdl = do
- importtree <- liftIO . recordTree' hdl =<< convertImportTree msubdir ls
+buildImportTree converttree basetree msubdir ls hdl = do
+ importtree <- liftIO . recordTree' hdl =<< converttree msubdir ls
graftImportTree basetree msubdir importtree hdl
graftImportTree
Just subdir -> inRepo $ \repo ->
graftTree' tree subdir basetree repo hdl
-convertImportTree :: Maybe TopFilePath -> [(ImportLocation, Either Sha Key)] -> Annex Tree
-convertImportTree msubdir ls = treeItemsToTree <$> mapM mktreeitem ls
- where
- mktreeitem (loc, v) = case v of
- Right k -> do
- relf <- fromRepo $ fromTopFilePath topf
- symlink <- calcRepo $ gitAnnexLink relf k
- linksha <- hashSymlink symlink
- return $ TreeItem treepath (fromTreeItemType TreeSymlink) linksha
- Left sha ->
- return $ TreeItem treepath (fromTreeItemType TreeFile) sha
- where
- lf = fromImportLocation loc
- treepath = asTopFilePath lf
- topf = asTopFilePath $
- maybe lf (\sd -> getTopFilePath sd P.</> lf) msubdir
-
buildImportTreesHistory
- :: Ref
+ :: (Maybe TopFilePath -> [(ImportLocation, v)] -> Annex Tree)
+ -> Ref
-> Maybe TopFilePath
- -> [ImportableContents (Either Sha Key)]
+ -> [ImportableContents v]
-> MkTreeHandle
-> Annex (S.Set (History Sha))
-buildImportTreesHistory basetree msubdir history hdl = S.fromList
- <$> mapM (\ic -> buildImportTrees' basetree msubdir ic hdl) history
+buildImportTreesHistory converttree basetree msubdir history hdl = S.fromList
+ <$> mapM (\ic -> buildImportTreesGeneric' converttree basetree msubdir ic hdl) history
canImportKeys :: Remote -> Bool -> Bool
canImportKeys remote importcontent =
-> ImportableContentsChunkable Annex (ContentIdentifier, ByteSize)
-> Annex (Maybe (ImportableContentsChunkable Annex (Either Sha Key)))
importKeys remote importtreeconfig importcontent thirdpartypopulated importablecontents = do
+ _ts <- buildContentIdentifierTree importablecontents
+ -- TODO use above
unless (canImportKeys remote importcontent) $
giveup "This remote does not support importing without downloading content."
-- This map is used to remember content identifiers that